[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [metrics-tasks/master] Change combined database file header (#6471).
commit d84ccdd2a0ad3d3f7e0aed2c65dad3b5e7ad81d0
Author: Karsten Loesing <karsten.loesing@xxxxxxx>
Date: Mon Nov 5 20:57:06 2012 -0500
Change combined database file header (#6471).
File header now contains database dates and file names.
---
.../src/org/torproject/task6471/DatabaseImpl.java | 9 ++--
.../torproject/task6471/DatabaseImporterImpl.java | 55 ++++++++------------
.../task6471/DatabasePerformanceExample.java | 2 +-
3 files changed, 27 insertions(+), 39 deletions(-)
diff --git a/task-6471/java/src/org/torproject/task6471/DatabaseImpl.java b/task-6471/java/src/org/torproject/task6471/DatabaseImpl.java
index 364f0c7..0338376 100644
--- a/task-6471/java/src/org/torproject/task6471/DatabaseImpl.java
+++ b/task-6471/java/src/org/torproject/task6471/DatabaseImpl.java
@@ -71,7 +71,7 @@ public class DatabaseImpl implements Database {
protected SortedSet<Integer> databaseDates = new TreeSet<Integer>();
/**
- * Database file names.
+ * Database dates and file names, formatted as yyyymmdd!filename.
*/
protected SortedSet<String> databaseFileNames = new TreeSet<String>();
@@ -243,11 +243,10 @@ public class DatabaseImpl implements Database {
if (line.startsWith("!")) {
/* First read file header containing database dates. */
- String databaseFileName = line.substring(1);
- String databaseDateString =
- databaseFileName.substring(databaseFileName.length() - 8);
+ String[] parts = line.substring(1).split("!");
+ this.databaseFileNames.add(line.substring(1));
+ String databaseDateString = parts[0];
int dbDate = convertDateStringToNumber(databaseDateString);
- this.databaseFileNames.add(databaseFileName);
this.databaseDates.add(dbDate);
} else {
diff --git a/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java b/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
index 2a6c203..0f7df77 100644
--- a/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
+++ b/task-6471/java/src/org/torproject/task6471/DatabaseImporterImpl.java
@@ -6,7 +6,6 @@ import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
-import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -17,7 +16,6 @@ import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.Stack;
-import java.util.TimeZone;
import java.util.TreeMap;
public class DatabaseImporterImpl extends DatabaseImpl
@@ -55,12 +53,6 @@ public class DatabaseImporterImpl extends DatabaseImpl
}
Collections.sort(allFiles, Collections.reverseOrder());
for (File file : allFiles) {
- String databaseFileName = file.getName();
- if (this.databaseFileNames.contains(databaseFileName)) {
- /* We already imported this file while loading combined databases
- * from disk. */
- continue;
- }
if (!this.importRegionalRegistryStatsFile(file)) {
allImportsSuccessful = false;
}
@@ -124,8 +116,8 @@ public class DatabaseImporterImpl extends DatabaseImpl
int databaseDate = convertDateStringToNumber(databaseDateString);
long startAddress = convertAddressStringToNumber(startAddressString);
long endAddress = startAddress + addresses - 1L;
- this.addRange(databaseFileName, databaseDate, startAddress,
- endAddress, code);
+ this.addDatabase(databaseFileName, databaseDate);
+ this.addRange(databaseDate, startAddress, endAddress, code);
}
public boolean importGeoLiteCityFileOrDirectory(String path) {
@@ -178,13 +170,11 @@ public class DatabaseImporterImpl extends DatabaseImpl
boolean importGeoLiteCityBlocksAndLocationFiles(File blocksFile,
File locationFile) {
- SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
long lastModifiedMillis = blocksFile.lastModified();
- String databaseFileName = blocksFile.getName() + " "
- + locationFile.getName() + " "
- + dateFormat.format(lastModifiedMillis);
+ String databaseFileName = blocksFile.getName() + "+"
+ + locationFile.getName();
int databaseDate = (int) (lastModifiedMillis / 86400000);
+ this.addDatabase(databaseFileName, databaseDate);
try {
/* Parse location file first and remember country codes for given
* locations. */
@@ -221,8 +211,7 @@ public class DatabaseImporterImpl extends DatabaseImpl
break;
}
String code = locations.get(location);
- this.addRange(databaseFileName, databaseDate, startAddress,
- endAddress, code);
+ this.addRange(databaseDate, startAddress, endAddress, code);
}
br.close();
} catch (IOException e) {
@@ -257,12 +246,10 @@ public class DatabaseImporterImpl extends DatabaseImpl
}
private boolean importGeoIPASNum2File(File file) {
- SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
- dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
long lastModifiedMillis = file.lastModified();
- String databaseFileName = file.getName() + " "
- + dateFormat.format(lastModifiedMillis);
+ String databaseFileName = file.getName();
int databaseDate = (int) (lastModifiedMillis / 86400000);
+ this.addDatabase(databaseFileName, databaseDate);
try {
BufferedReader br = new BufferedReader(new FileReader(file));
String line;
@@ -275,8 +262,7 @@ public class DatabaseImporterImpl extends DatabaseImpl
/* Don't import illegal range. */
continue;
}
- this.addRange(databaseFileName, databaseDate, startAddress,
- endAddress, code);
+ this.addRange(databaseDate, startAddress, endAddress, code);
}
br.close();
this.repairTree();
@@ -292,6 +278,17 @@ public class DatabaseImporterImpl extends DatabaseImpl
private int rangeImports = 0, rangeImportsKeyLookups = 0;
/**
+ * Add new database date and file name if we didn't know them yet. */
+ void addDatabase(String databaseFileName, int databaseDate) {
+ if (!this.databaseDates.contains(databaseDate)) {
+ this.databaseDates.add(databaseDate);
+ this.addedDatabaseDate = databaseDate;
+ }
+ this.databaseFileNames.add(convertDateNumberToString(databaseDate)
+ + "!" + databaseFileName);
+ }
+
+ /**
* Add a single address and date range to the tree, which may require
* splitting up existing ranges.
*
@@ -300,18 +297,10 @@ public class DatabaseImporterImpl extends DatabaseImpl
* is called prior to any lookupAddress() calls. No further checks are
* performed that the tree is repaired before looking up an address.
*/
- void addRange(String databaseFileName, int databaseDate,
- long startAddress, long endAddress, String code) {
+ void addRange(int databaseDate, long startAddress, long endAddress,
+ String code) {
this.rangeImports++;
- /* Add new database date and file name if we didn't know them yet,
- * and note that we need to repair the tree after importing. */
- if (!this.databaseDates.contains(databaseDate)) {
- this.databaseDates.add(databaseDate);
- this.addedDatabaseDate = databaseDate;
- }
- this.databaseFileNames.add(databaseFileName);
-
/* We might have to split existing ranges or the new range before
* adding it to the tree, and we might have to remove existing ranges.
* We shouldn't mess with the tree directly while iterating over it,
diff --git a/task-6471/java/src/org/torproject/task6471/DatabasePerformanceExample.java b/task-6471/java/src/org/torproject/task6471/DatabasePerformanceExample.java
index 5f8573a..bdfb140 100644
--- a/task-6471/java/src/org/torproject/task6471/DatabasePerformanceExample.java
+++ b/task-6471/java/src/org/torproject/task6471/DatabasePerformanceExample.java
@@ -19,7 +19,7 @@ import java.util.TreeSet;
public class DatabasePerformanceExample {
public static void main(String[] args) throws IOException {
- File testCasesCsvFile = new File("test-cases.csv");
+ File testCasesCsvFile = new File("test-cases-2007-10-2012-09.csv");
if (!testCasesCsvFile.exists()) {
System.out.print("Generating test cases... ");
long startMillis = System.currentTimeMillis();
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits