From d33e0a5bc19fde94b042198436925989ce1a2216 Mon Sep 17 00:00:00 2001 From: Piotr Gawron <piotr.gawron@uni.lu> Date: Thu, 16 Mar 2017 10:28:50 +0100 Subject: [PATCH] functionality to remove duplicates added --- .../parse/DuplicateRemoveParser.java | 48 ++++++++++++++++ .../parse/InvalidArgumentException.java | 14 +++++ .../appointment/parse/LihControlParser.java | 12 ++++ .../java/smash/appointment/parse/Main.java | 19 +++++-- .../java/smash/appointment/parse/Subject.java | 23 ++++---- .../smash/appointment/parse/SubjectDao.java | 24 ++++++++ .../src/main/resources/log4j.properties | 11 +++- .../smash/appointment/parse/AllTests.java | 1 + .../parse/DuplicateRemoveParserTest.java | 45 +++++++++++++++ .../parse/LihControlParserTest.java | 2 +- .../appointment/parse/SubjectDaoTest.java | 56 +++++++++++++++++++ appointment-import/testFiles/duplicates.txt | 2 + 12 files changed, 238 insertions(+), 19 deletions(-) create mode 100644 appointment-import/src/main/java/smash/appointment/parse/DuplicateRemoveParser.java create mode 100644 appointment-import/src/main/java/smash/appointment/parse/InvalidArgumentException.java create mode 100644 appointment-import/src/test/java/smash/appointment/parse/DuplicateRemoveParserTest.java create mode 100644 appointment-import/testFiles/duplicates.txt diff --git a/appointment-import/src/main/java/smash/appointment/parse/DuplicateRemoveParser.java b/appointment-import/src/main/java/smash/appointment/parse/DuplicateRemoveParser.java new file mode 100644 index 00000000..42fd910a --- /dev/null +++ b/appointment-import/src/main/java/smash/appointment/parse/DuplicateRemoveParser.java @@ -0,0 +1,48 @@ +package smash.appointment.parse; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; + +public class DuplicateRemoveParser { + private SubjectDao subjectDao; + + public void removeDuplicates(String filename) throws IOException { + try (BufferedReader br = new BufferedReader(new FileReader(filename))) { + String line; + while ((line = br.readLine()) != null) { + String tmp[] = line.split("\t"); + Subject subject = subjectDao.getByScreeningNumber(tmp[0]); + if (subject == null) { + throw new InvalidArgumentException("Cannot find subject with id: " + tmp[0]); + } + for (int i = 1; i < tmp.length; i++) { + Subject duplicate = subjectDao.getByScreeningNumber(tmp[i]); + + if (duplicate == null) { + throw new InvalidArgumentException("Cannot find subject with id: " + tmp[i]); + } + subjectDao.removeDuplicate(subject, duplicate, "DUPLICATES: " + tmp[0] + ", " + tmp[i]); + } + } + } + + } + + /** + * @return the subjectDao + * @see #subjectDao + */ + public SubjectDao getSubjectDao() { + return subjectDao; + } + + /** + * @param subjectDao + * the subjectDao to set + * @see #subjectDao + */ + public void setSubjectDao(SubjectDao subjectDao) { + this.subjectDao = subjectDao; + } +} diff --git a/appointment-import/src/main/java/smash/appointment/parse/InvalidArgumentException.java b/appointment-import/src/main/java/smash/appointment/parse/InvalidArgumentException.java new file mode 100644 index 00000000..7ae0e2d8 --- /dev/null +++ b/appointment-import/src/main/java/smash/appointment/parse/InvalidArgumentException.java @@ -0,0 +1,14 @@ +package smash.appointment.parse; + +public class InvalidArgumentException extends RuntimeException { + + public InvalidArgumentException(String string) { + super(string); + } + + /** + * + */ + private static final long serialVersionUID = 1L; + +} diff --git a/appointment-import/src/main/java/smash/appointment/parse/LihControlParser.java b/appointment-import/src/main/java/smash/appointment/parse/LihControlParser.java index a99ba461..d115b890 100644 --- a/appointment-import/src/main/java/smash/appointment/parse/LihControlParser.java +++ b/appointment-import/src/main/java/smash/appointment/parse/LihControlParser.java @@ -101,6 +101,18 @@ public class LihControlParser extends SubjectParser { @Override protected String parseCountry(Row row) { + String zipCode = parseZipCode(row); + if (zipCode!=null) { + if (zipCode.startsWith("L")) { + return "Luxembourg"; + } else if (zipCode.startsWith("D")){ + return "Germany"; + } else if (zipCode.startsWith("F")){ + return "France"; + } else if (zipCode.startsWith("B")){ + return "Belgium"; + } + } return ""; } diff --git a/appointment-import/src/main/java/smash/appointment/parse/Main.java b/appointment-import/src/main/java/smash/appointment/parse/Main.java index 0d7c3423..be0d279e 100644 --- a/appointment-import/src/main/java/smash/appointment/parse/Main.java +++ b/appointment-import/src/main/java/smash/appointment/parse/Main.java @@ -1,10 +1,8 @@ package smash.appointment.parse; -import java.text.SimpleDateFormat; +import java.io.IOException; import java.util.Calendar; -import java.util.HashSet; import java.util.List; -import java.util.Set; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -31,6 +29,7 @@ public class Main { Option lihControls = Option.builder().required().argName("file").hasArg().desc("LIH controls").longOpt("lih-controls").build(); Option lihMappingControls = Option.builder().required().argName("file").hasArg().desc("LIH controls mapping").longOpt("lih-mapping").build(); Option redCap = Option.builder().required().argName("file").hasArg().desc("RedCap appointments").longOpt("red-cap").build(); + Option duplicates = Option.builder().required().argName("file").hasArg().desc("duplicates").longOpt("duplicates").build(); options.addOption(agenda); options.addOption(subjects); options.addOption(controls); @@ -38,6 +37,7 @@ public class Main { options.addOption(lihControls); options.addOption(lihMappingControls); options.addOption(redCap); + options.addOption(duplicates); CommandLineParser parser = new DefaultParser(); try { @@ -70,6 +70,9 @@ public class Main { .addSubject(subject, "[" + lihControlsFile + ";" + subject.getScreeningNumber() + ";" + subject.getName() + " " + subject.getSurname() + "]"); } + String duplicatesFile = line.getOptionValue("duplicates"); + removeDuplicates(duplicatesFile); + subjectDao.addSubject(Visit.UNKNOWN, ""); String agendaFile = line.getOptionValue("agenda"); @@ -86,7 +89,7 @@ public class Main { System.out.println("delete from web_appointment;"); System.out.println("delete from web_visit;"); System.out.println("delete from web_subject;"); - + SubjectSqlExporter subjectSqlExporter = new SubjectSqlExporter(); // logger.debug("SUBJECTS: "); for (Subject subject : subjectDao.getSubjects()) { @@ -107,7 +110,13 @@ public class Main { } } - private List<AppointmentEntry> processRedCapAppointments(String agendaFile) throws Exception{ + private void removeDuplicates(String duplicatesFile) throws IOException { + DuplicateRemoveParser parser = new DuplicateRemoveParser(); + parser.setSubjectDao(subjectDao); + parser.removeDuplicates(duplicatesFile); + } + + private List<AppointmentEntry> processRedCapAppointments(String agendaFile) throws Exception { RedcapParser parser = new RedcapParser(); parser.setSubjectDao(subjectDao); return parser.parse(agendaFile); diff --git a/appointment-import/src/main/java/smash/appointment/parse/Subject.java b/appointment-import/src/main/java/smash/appointment/parse/Subject.java index 3921389c..5e76186c 100644 --- a/appointment-import/src/main/java/smash/appointment/parse/Subject.java +++ b/appointment-import/src/main/java/smash/appointment/parse/Subject.java @@ -289,8 +289,8 @@ public class Subject { public void setPhone1(String phone1) { if (phone1 != null && phone1.length() > 20) { logger.warn("Invalid phone. Ignoring: " + phone1); - } else { - this.phone1 = phone1; + } else if (phone1 != null) { + this.phone1 = phone1.replace(",", ""); } } @@ -310,8 +310,8 @@ public class Subject { public void setPhone2(String phone2) { if (phone2 != null && phone2.length() > 20) { logger.warn("Invalid phone. Ignoring: " + phone2); - } else { - this.phone2 = phone2; + } else if (phone2 != null) { + this.phone2 = phone2.replace(",", ""); } } @@ -331,8 +331,8 @@ public class Subject { public void setPhone3(String phone3) { if (phone3 != null && phone3.length() > 20) { logger.warn("Invalid phone. Ignoring: " + phone3); - } else { - this.phone3 = phone3; + } else if (phone3!=null){ + this.phone3 = phone3.replace(",", ""); } } @@ -477,11 +477,11 @@ public class Subject { setAddDate(getMergedValue("addDate", this.getAddDate(), subject.getAddDate(), errorPrefix)); setmPowerId(getMergedValue("mPowerId", this.getmPowerId(), subject.getmPowerId(), errorPrefix)); setType(getMergedValue("type", this.getType(), subject.getType(), errorPrefix)); - setResigned(this.isResigned()|| subject.isResigned()); - setDead(this.isDead()|| subject.isDead()); - setPostponed(this.isPostponed()|| subject.isPostponed()); + setResigned(this.isResigned() || subject.isResigned()); + setDead(this.isDead() || subject.isDead()); + setPostponed(this.isPostponed() || subject.isPostponed()); // override only when to be seen by flying team - if (subject.getToBeSeenAt().equals("F")) { + if (subject.getToBeSeenAt()!=null && subject.getToBeSeenAt().equals("F")) { setToBeSeenAt(subject.getToBeSeenAt()); } addLanguages(subject.getLanguages()); @@ -579,7 +579,8 @@ public class Subject { } /** - * @param postponed the postponed to set + * @param postponed + * the postponed to set * @see #postponed */ public void setPostponed(boolean postponed) { diff --git a/appointment-import/src/main/java/smash/appointment/parse/SubjectDao.java b/appointment-import/src/main/java/smash/appointment/parse/SubjectDao.java index 3cbfc396..8eba162f 100644 --- a/appointment-import/src/main/java/smash/appointment/parse/SubjectDao.java +++ b/appointment-import/src/main/java/smash/appointment/parse/SubjectDao.java @@ -84,4 +84,28 @@ public class SubjectDao { } } + public void removeDuplicate(Subject subject1, Subject subject2, String errorPrefix) { + String screeningNumber1 = subject1.getScreeningNumber().trim(); + String screeningNumber2 = subject2.getScreeningNumber().trim(); + String screeningNumber = null; + if (screeningNumber1.toLowerCase().contains(screeningNumber2.toLowerCase())) { + screeningNumber = screeningNumber1; + } else if (screeningNumber2.toLowerCase().contains(screeningNumber1.toLowerCase())) { + screeningNumber = screeningNumber2; + } else { + screeningNumber = screeningNumber1 + "; " + screeningNumber2; + } + subject1.setScreeningNumber(screeningNumber); + subject2.setScreeningNumber(screeningNumber); + if (!subject1.getNdNumber().trim().isEmpty() && !subject2.getNdNumber().trim().isEmpty() + && !subject1.getNdNumber().trim().equals(subject2.getNdNumber().trim())) { + throw new InvalidArgumentException("Two different nd numbers: " + subject1.getNdNumber() + ", " + subject2.getNdNumber()); + } + + String remarks = (subject1.getRemarks()+"\n"+subject2.getRemarks()).trim(); + subject1.setRemarks(remarks); + subject1.update(subject2, errorPrefix); + subjects.remove(subject2); + } + } diff --git a/appointment-import/src/main/resources/log4j.properties b/appointment-import/src/main/resources/log4j.properties index c3a78a92..7ad74ade 100644 --- a/appointment-import/src/main/resources/log4j.properties +++ b/appointment-import/src/main/resources/log4j.properties @@ -1,8 +1,15 @@ -#Set root logger 's level and its appender to an appender called CONSOLE which is defined below. -log4j.rootLogger=fatal, CONSOLE +log4j.rootLogger=CONSOLE, R #Set the behavior of the CONSOLE appender log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.Threshold=FATAL log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout log4j.appender.CONSOLE.layout.ConversionPattern=%d %5p [%t] (%F:%L) - %m%n #log4j.appender.CONSOLE.layout.ConversionPattern=%m%n + +#Set the behavior of the FILE appender +log4j.appender.R=org.apache.log4j.FileAppender +log4j.appender.R.File=log.txt +log4j.appender.R.layout=org.apache.log4j.PatternLayout +log4j.appender.R.layout.ConversionPattern=%d %5p [%t] (%F:%L) - %m%n +log4j.appender.R.Threshold=DEBUG diff --git a/appointment-import/src/test/java/smash/appointment/parse/AllTests.java b/appointment-import/src/test/java/smash/appointment/parse/AllTests.java index 1755cc4b..e9008b24 100644 --- a/appointment-import/src/test/java/smash/appointment/parse/AllTests.java +++ b/appointment-import/src/test/java/smash/appointment/parse/AllTests.java @@ -7,6 +7,7 @@ import org.junit.runners.Suite.SuiteClasses; @RunWith(Suite.class) @SuiteClasses({ AppointmentDaoTest.class, // CellParserTest.class, // + DuplicateRemoveParserTest.class, // LihControlMappingParserTest.class, // LihControlParserTest.class, // PrcControlParserTest.class, // diff --git a/appointment-import/src/test/java/smash/appointment/parse/DuplicateRemoveParserTest.java b/appointment-import/src/test/java/smash/appointment/parse/DuplicateRemoveParserTest.java new file mode 100644 index 00000000..7b078674 --- /dev/null +++ b/appointment-import/src/test/java/smash/appointment/parse/DuplicateRemoveParserTest.java @@ -0,0 +1,45 @@ +package smash.appointment.parse; + +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.Test; + +public class DuplicateRemoveParserTest { + + @AfterClass + public static void tearDownAfterClass() throws Exception { + } + + @Before + public void setUp() throws Exception { + } + + @After + public void tearDown() throws Exception { + } + + @Test + public void test() throws IOException { + DuplicateRemoveParser parser = new DuplicateRemoveParser(); + + SubjectDao subjectDao = new SubjectDao(); + + subjectDao.addSubject(new Subject("First", "Last", "","P-113"), null); + subjectDao.addSubject(new Subject("First", "Last", "","P-114"), null); + subjectDao.addSubject(new Subject("A", "B", "","P-115"), null); + subjectDao.addSubject(new Subject("C", "D", "","P-116"), null); + + assertEquals(4, subjectDao.getSubjects().size()); + + parser.setSubjectDao(subjectDao); + parser.removeDuplicates("testFiles/duplicates.txt"); + + assertEquals(2, subjectDao.getSubjects().size()); + } + +} diff --git a/appointment-import/src/test/java/smash/appointment/parse/LihControlParserTest.java b/appointment-import/src/test/java/smash/appointment/parse/LihControlParserTest.java index 05845e05..9cd3eb20 100644 --- a/appointment-import/src/test/java/smash/appointment/parse/LihControlParserTest.java +++ b/appointment-import/src/test/java/smash/appointment/parse/LihControlParserTest.java @@ -49,7 +49,7 @@ public class LihControlParserTest extends TestBase { assertEquals("11, Rue blabla", subject.getAddress()); assertEquals("L-3322", subject.getZipCode()); assertEquals("Luxembourg", subject.getCity()); - assertEquals("", subject.getCountry()); + assertEquals("Luxembourg", subject.getCountry()); assertEquals("123456789", subject.getPhone1()); assertEquals("321654", subject.getPhone2()); assertEquals("", subject.getPhone3()); diff --git a/appointment-import/src/test/java/smash/appointment/parse/SubjectDaoTest.java b/appointment-import/src/test/java/smash/appointment/parse/SubjectDaoTest.java index 66fc18c5..0585aa0d 100644 --- a/appointment-import/src/test/java/smash/appointment/parse/SubjectDaoTest.java +++ b/appointment-import/src/test/java/smash/appointment/parse/SubjectDaoTest.java @@ -1,6 +1,8 @@ package smash.appointment.parse; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import org.junit.After; import org.junit.AfterClass; @@ -29,4 +31,58 @@ public class SubjectDaoTest { assertEquals("Piotr", subjectDao.getSubjects().get(0).getName()); } + @Test + public void testDuplicateRem() throws Exception { + Subject subject1 = new Subject("A", "B", "C", "D"); + subject1.setToBeSeenAt(""); + Subject subject2 = new Subject("A", "B", "", "E"); + subject2.setToBeSeenAt(""); + SubjectDao subjectDao = new SubjectDao(); + + subjectDao.addSubject(subject1, null); + subjectDao.addSubject(subject2, null); + subjectDao.removeDuplicate(subject1, subject2, null); + + assertEquals(1, subjectDao.getSubjects().size()); + assertTrue(subject1.getScreeningNumber().contains("D")); + assertTrue(subject1.getScreeningNumber().contains("E")); + assertEquals("C", subject1.getNdNumber()); + } + + @Test + public void testDuplicateRem2() throws Exception { + Subject subject1 = new Subject("A", "B", "C", "D"); + subject1.setToBeSeenAt(""); + Subject subject2 = new Subject("A", "B", "", "E"); + subject2.setToBeSeenAt(""); + SubjectDao subjectDao = new SubjectDao(); + + subjectDao.addSubject(subject1, null); + subjectDao.addSubject(subject2, null); + subjectDao.removeDuplicate(subject2, subject1, null); + + assertEquals(1, subjectDao.getSubjects().size()); + assertTrue(subject2.getScreeningNumber().contains("D")); + assertTrue(subject2.getScreeningNumber().contains("E")); + assertEquals("C", subject2.getNdNumber()); + } + + @Test + public void testDuplicateRem3() throws Exception { + try { + Subject subject1 = new Subject("A", "B", "C", "D"); + subject1.setToBeSeenAt(""); + Subject subject2 = new Subject("A", "B", "X", "E"); + subject2.setToBeSeenAt(""); + SubjectDao subjectDao = new SubjectDao(); + + subjectDao.addSubject(subject1, null); + subjectDao.addSubject(subject2, null); + subjectDao.removeDuplicate(subject2, subject1, null); + + fail("Exception expected"); + } catch (InvalidArgumentException e) { + } + } + } diff --git a/appointment-import/testFiles/duplicates.txt b/appointment-import/testFiles/duplicates.txt new file mode 100644 index 00000000..3c7625d5 --- /dev/null +++ b/appointment-import/testFiles/duplicates.txt @@ -0,0 +1,2 @@ +P-113 P-114 +P-115 P-116 \ No newline at end of file -- GitLab