Skip to content
Snippets Groups Projects
Commit bb7925e0 authored by Piotr Gawron's avatar Piotr Gawron
Browse files

Merge branch '508-correct-headers-of-the-genomic-dataset-format' into 'devel_12.1.x'

Resolve "Correct headers of the genomic dataset format"

See merge request !395
parents 58716951 abe92207
No related branches found
No related tags found
2 merge requests!412changes from 12.1.0~beta.1 into master,!395Resolve "Correct headers of the genomic dataset format"
Pipeline #6388 passed
......@@ -28,6 +28,7 @@ public class CustomDatabasePopulator implements DatabasePopulator {
flyway.setBaselineVersionAsString("12.1.0");
flyway.setOutOfOrder(true);
flyway.setBaselineOnMigrate(true);
flyway.setValidateOnMigrate(false);
flyway.migrate();
}
......
......@@ -151,12 +151,18 @@ public class ColorSchemaReader {
}
String[] columns = line.split("\t");
Map<ColorSchemaColumn, Integer> schemaColumns = new HashMap<ColorSchemaColumn, Integer>();
List<Pair<MiriamType, Integer>> customIdentifiers = new ArrayList<Pair<MiriamType, Integer>>();
Map<ColorSchemaColumn, Integer> schemaColumns = new HashMap<>();
List<Pair<MiriamType, Integer>> customIdentifiers = new ArrayList<>();
parseColumns(columns, schemaColumns, customIdentifiers, ColorSchemaType.GENETIC_VARIANT);
Integer colorColumn = schemaColumns.get(ColorSchemaColumn.COLOR);
Integer contigColumn = schemaColumns.get(ColorSchemaColumn.CONTIG);
if (contigColumn == null) {
contigColumn = schemaColumns.get(ColorSchemaColumn.CHROMOSOME);
}
Integer nameColumn = schemaColumns.get(ColorSchemaColumn.NAME);
if (nameColumn == null) {
nameColumn = schemaColumns.get(ColorSchemaColumn.GENE_NAME);
}
Integer modelNameColumn = schemaColumns.get(ColorSchemaColumn.MODEL_NAME);
Integer identifierColumn = schemaColumns.get(ColorSchemaColumn.IDENTIFIER);
Integer variantIdentifierColumn = schemaColumns.get(ColorSchemaColumn.VARIANT_IDENTIFIER);
......
......@@ -3,6 +3,8 @@ package lcsb.mapviewer.services.utils.data;
import java.util.HashSet;
import java.util.Set;
import org.primefaces.component.export.Exporter.ColumnType;
import lcsb.mapviewer.model.map.layout.ReferenceGenome;
import lcsb.mapviewer.model.map.layout.ReferenceGenomeType;
......@@ -20,6 +22,8 @@ public enum ColorSchemaColumn {
*/
NAME("name", new ColorSchemaType[] { ColorSchemaType.GENERIC, ColorSchemaType.GENETIC_VARIANT }), //
GENE_NAME("gene_name", new ColorSchemaType[] { ColorSchemaType.GENERIC, ColorSchemaType.GENETIC_VARIANT }), //
/**
* Name of the element.
*/
......@@ -56,7 +60,7 @@ public enum ColorSchemaColumn {
* Element identifier.
*/
ELEMENT_IDENTIFIER("elementIdentifier", new ColorSchemaType[] { ColorSchemaType.GENERIC }), //
/**
* Element identifier.
*/
......@@ -108,6 +112,8 @@ public enum ColorSchemaColumn {
*/
CONTIG("contig", new ColorSchemaType[] { ColorSchemaType.GENETIC_VARIANT }), //
CHROMOSOME("chromosome", new ColorSchemaType[] { ColorSchemaType.GENETIC_VARIANT }), //
ALLEL_FREQUENCY("allel_frequency", new ColorSchemaType[] { ColorSchemaType.GENETIC_VARIANT }), //
VARIANT_IDENTIFIER("variant_identifier", new ColorSchemaType[] { ColorSchemaType.GENETIC_VARIANT }), //
......
......@@ -102,6 +102,40 @@ public class ColorSchemaReaderTest extends ServiceTestFunctions {
}
}
@Test
public void testReadGeneVariantsWithNewNamingSchema() throws Exception {
try {
File f = new File("testFiles/coloring/gene_variants_new_naming.txt");
InputStream in = new FileInputStream(f);
byte[] buff = new byte[8000];
int bytesRead = 0;
ByteArrayOutputStream bao = new ByteArrayOutputStream();
while ((bytesRead = in.read(buff)) != -1) {
bao.write(buff, 0, bytesRead);
}
in.close();
bao.close();
byte[] data = bao.toByteArray();
ByteArrayInputStream bin = new ByteArrayInputStream(data);
ColorSchemaReader reader = new ColorSchemaReader();
Collection<ColorSchema> schemas = reader.readColorSchema(bin,
TextFileUtils.getHeaderParametersFromFile(new ByteArrayInputStream(data)));
assertNotNull(schemas);
assertEquals(3, schemas.size());
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testReadGeneVariantsSchemaWithAF() throws Exception {
try {
......
#TYPE=GENETIC_VARIANT
#GENOME_TYPE=UCSC
#GENOME_VERSION=hg38
position original_dna alternative_dna gene_name description color chromosome
10146 AC A DDX11L1 upstream #ff0000 chr1
10439 AC A DDX11L1;WASH7P upstream;downstream #ff0001 chr1
10583 G A DDX11L1;WASH7P upstream;downstream #ff0002 chr1
10611 C G DDX11L1;WASH7P upstream;downstream #ff0003 chr1
10616 CCGCCGTTGCAAAGGCGCGCCG C DDX11L1;WASH7P upstream;downstream #ff0004 chr1
10904 G A DDX11L1;WASH7P upstream;downstream #ff0005 chr1
12783 G A DDX11L1 intronic #ff0006 chr1
13116 T G DDX11L1 intronic #ff0007 chr1
13118 A G DDX11L1 intronic #ff0008 chr1
13143 G C DDX11L1 intronic #ff0009 chr1
13273 G C DDX11L1 intronic #ff0010 chr1
13302 C T DDX11L1 intronic #ff0011 chr1
13417 C CGAGA DDX11L1 exonic #ff0012 chr1
13550 G A DDX11L1 exonic #ff0013 chr1
13656 CAG C DDX11L1 UTR3 #ff0014 chr1
13668 G A DDX11L1 UTR3 #ff0015 chr1
13813 T G DDX11L1 UTR3 #ff0016 chr1
13838 C T DDX11L1 UTR3 #ff0017 chr1
13868 A G DDX11L1 UTR3 #ff0018 chr1
13896 C A DDX11L1 UTR3 #ff0019 chr1
13957 TC T DDX11L1 UTR3 #ff0020 chr1
14354 C A DDX11L1 UTR3 #ff0021 chr1
14464 A T WASH7P ncRNA_exonic #ff0022 chr1
14653 C T WASH7P ncRNA_exonic #ff0023 chr1
14671 G C WASH7P ncRNA_exonic #ff0024 chr1
14673 G C WASH7P ncRNA_exonic #ff0025 chr1
14677 G A WASH7P ncRNA_exonic #ff0026 chr1
14699 C G WASH7P ncRNA_exonic #ff0027 chr1
14907 A G WASH7P ncRNA_exonic #ff0028 chr1
14930 A G WASH7P ncRNA_exonic #ff0029 chr1
14933 G A WASH7P ncRNA_exonic #ff0030 chr1
14976 G A WASH7P ncRNA_exonic #ff0031 chr1
15118 A G WASH7P ncRNA_exonic #ff0032 chr1
15190 G A WASH7P ncRNA_exonic #ff0033 chr1
15211 T G WASH7P ncRNA_exonic #ff0034 chr1
15274 A G,T WASH7P ncRNA_exonic #ff0035 chr1
15447 A G WASH7P ncRNA_exonic #ff0036 chr1
15688 C T WASH7P ncRNA_exonic #ff0037 chr1
15820 G T WASH7P ncRNA_exonic #ff0038 chr1
15903 G GC WASH7P ncRNA_exonic #ff0039 chr1
16014 C T WASH7P ncRNA_exonic #ff0040 chr1
16068 T C WASH7P ncRNA_exonic #ff0041 chr1
16103 T G WASH7P ncRNA_exonic #ff0042 chr1
16206 T A WASH7P ncRNA_exonic #ff0043 chr1
16226 AG A WASH7P ncRNA_exonic #ff0044 chr1
16257 G C WASH7P ncRNA_exonic #ff0045 chr1
16288 C G WASH7P ncRNA_exonic #ff0046 chr1
16298 C T WASH7P ncRNA_exonic #ff0047 chr1
16378 T C WASH7P ncRNA_exonic #ff0048 chr1
16487 T C WASH7P ncRNA_exonic #ff0049 chr1
16495 G C WASH7P ncRNA_exonic #ff0050 chr1
16497 A G WASH7P ncRNA_exonic #ff0051 chr1
16534 C T WASH7P ncRNA_exonic #ff0052 chr1
16571 G A WASH7P ncRNA_exonic #ff0053 chr1
16682 G A WASH7P ncRNA_exonic #ff0054 chr1
16688 G A WASH7P ncRNA_exonic #ff0055 chr1
16719 T A WASH7P ncRNA_exonic #ff0056 chr1
16737 G T WASH7P ncRNA_exonic #ff0057 chr1
16742 G C WASH7P ncRNA_exonic #ff0058 chr1
16809 C G WASH7P ncRNA_intronic #ff0059 chr1
16841 G T WASH7P ncRNA_intronic #ff0060 chr1
16949 A C WASH7P ncRNA_exonic #ff0061 chr1
16963 G A WASH7P ncRNA_exonic #ff0062 chr1
16977 G A WASH7P ncRNA_exonic #ff0063 chr1
17020 G A WASH7P ncRNA_exonic #ff0064 chr1
17222 A G WASH7P ncRNA_intronic #ff0065 chr1
17385 G A MIR6859-1,MIR6859-2,MIR6859-3,MIR6859-4 ncRNA_exonic #ff0066 chr1
17408 C G MIR6859-1,MIR6859-2,MIR6859-3,MIR6859-4 ncRNA_exonic #ff0067 chr1
17538 C A WASH7P ncRNA_exonic #ff0068 chr1
17594 C T WASH7P ncRNA_exonic #ff0069 chr1
17614 G A WASH7P ncRNA_exonic #ff0070 chr1
17697 G C WASH7P ncRNA_exonic #ff0071 chr1
17730 C A WASH7P ncRNA_exonic #ff0072 chr1
17746 A G WASH7P ncRNA_exonic;splicing #ff0073 chr1
17765 G A WASH7P ncRNA_exonic #ff0074 chr1
17961 TG T WASH7P ncRNA_exonic #ff0075 chr1
19004 A G WASH7P ncRNA_exonic #ff0076 chr1
19322 C T WASH7P ncRNA_exonic #ff0077 chr1
19583 A G WASH7P ncRNA_exonic #ff0078 chr1
19592 T C WASH7P ncRNA_exonic #ff0079 chr1
19600 A T WASH7P ncRNA_exonic #ff0080 chr1
19604 T C WASH7P ncRNA_exonic #ff0081 chr1
19776 A G WASH7P ncRNA_intronic #ff0082 chr1
19788 C T WASH7P ncRNA_intronic #ff0083 chr1
19858 C T WASH7P ncRNA_intronic #ff0084 chr1
19942 G C WASH7P ncRNA_intronic #ff0085 chr1
20094 TAA T WASH7P ncRNA_intronic #ff0086 chr1
20098 CAG C WASH7P ncRNA_intronic #ff0087 chr1
20129 C T WASH7P ncRNA_intronic #ff0088 chr1
20136 T C WASH7P ncRNA_intronic #ff0089 chr1
20144 G A WASH7P ncRNA_intronic #ff0090 chr1
20191 T G WASH7P ncRNA_intronic #ff0091 chr1
20206 C T WASH7P ncRNA_intronic #ff0092 chr1
20207 G A WASH7P ncRNA_intronic #ff0093 chr1
20212 A G WASH7P ncRNA_intronic #ff0094 chr1
20227 T A WASH7P ncRNA_intronic #ff0095 chr1
20235 G A WASH7P ncRNA_intronic #ff0096 chr1
20245 G A WASH7P ncRNA_intronic #ff0097 chr1
20250 T C WASH7P ncRNA_intronic #ff0098 chr1
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment