# Copyright 2006 Instituto de Investigaciones Dr. José María Luis Mora / # Instituto de Investigaciones Estéticas. # See COPYING.txt and LICENSE.txt for redistribution conditions. # # D.R. 2006 Instituto de Investigaciones Dr. José María Luis Mora / # Instituto de Investigaciones Estéticas. # Véase COPYING.txt y LICENSE.txt para los términos bajo los cuales # se permite la redistribución. module KRLogic module DataImport include RJBM::JAccess j_import('mx.org.pescador.krmodel.graphelements.Graph', 'mx.org.pescador.krmodel.KRModel', 'mx.org.pescador.krmodel.operations.DOModifier', 'mx.org.pescador.krmodel.operations.ComplexValModifier', 'mx.org.pescador.Lang', 'mx.org.pescador.krmodel.ruleappliers.CompVectorValueManager', 'mx.org.pescador.krmodel.ruleappliers.TextBPVManager', 'mx.org.pescador.krmodel.ruleappliers.KRRelationsManager') require 'csv' require 'RMagick' class CSVFile attr_reader(:columns, :rule_set, :realm, :name, :has_class_data) def initialize(path, file_name) @name = file_name data_tmp = Array.new CSV.open(path + @name, 'r') do |row| data_tmp << row end @has_class_data = (data_tmp[0][1] == "class") columns_start = @has_class_data ? 2 : 1 @columns = Array.new data_tmp[0][columns_start..-1].each do |heading| @columns << CSVColumn.new(heading, self) end @rows = Array.new data_tmp[1..data_tmp.length].each do |data_row| @rows << CSVRow.new(data_row, self) end end class CSVRow attr_reader(:id) def initialize(raw_data, in_file) @in_file = in_file @id = raw_data[0] if (in_file.has_d_objs) if (@in_file.has_class_data) # TODO: unify all code that parses these uri things class_id_parts = raw_data[1].split(":") class_realm = KRModel.getRealm(class_id_parts[0]) class_local_uri_part = class_id_parts[1] @d_obj_class = Graph.getConcreteCls(class_local_uri_part, class_realm.ont) @raw_data = raw_data[2..-1] else @d_obj_class = @in_file.rep_org_grp.groupDomain @raw_data = raw_data[1..-1] end else @raw_data = raw_data[1..-1] end if (@raw_data.size != @in_file.columns.size) raise "Row has the wrong number of columns, in: " + @in_file.name end end def setup_d_obj_subject if (@id =~ /^\D+\./) assign_soc_subject @in_file.rep_org_grp.add(@subject) else make_d_obj_subject end end def make_d_obj_subject @subject = DOModifier.generateGenericDO(@id, @d_obj_class, @in_file.rep_org_grp) end def make_c_val_subject(data_type) @subject = ComplexValModifier.generateComplexVal(@id, Graph.repository, data_type) end def assign_soc_subject id_parts = @id.split("."); soc_realm = KRModel.getRealm(id_parts[0]) @subject = soc_realm.repositoryArea.soc(id_parts[1]) end def data_writer @subject.dataWriter end def write_data for i in 0...@raw_data.size raw_cell = @raw_data[i] column = @in_file.columns[i] if (raw_cell == "null") next end if (raw_cell =~ /^<(\w|\-|_|\.)+>$/) cell = CSVDescObjCell.new(raw_cell, self, column, @in_file) elsif ((raw_cell =~ /^\[(\w|\-|\:|_|\.)+\]$/)) cell = CSVComplValCell.new(raw_cell, self, column, @in_file) elsif (raw_cell =~ /^\{(\w|\-|_|\.)+\}$/) cell = CSVSOCCell.new(raw_cell, self, column, @in_file) else cell = CSVLiteralCell.new(raw_cell, self, column, @in_file) end cell.submit_data end # p @raw_data.inspect m = data_writer.execute # TODO: check for successful message end end class CSVColumn attr_reader(:krr_rule) def initialize(heading, in_file) @in_file = in_file @krr_rule = @in_file.rule_set.baseStructureKRRelRule(heading) end end class CSVDataCell def initialize(in_row, in_col, in_file) @in_row = in_row @in_col = in_col @in_file = in_file end def submit_data @in_row.data_writer.submitNew(@obj, @in_col.krr_rule) end end class CSVLiteralCell < CSVDataCell def initialize(raw_data, in_row, in_col, in_file) super(in_row, in_col, in_file) if (raw_data == nil) raise "Nil raw data in literal cell " + in_row.id + " " + in_col.krr_rule.fullIdent end p = raw_data.split("^^") literal_val = p[0] d_type_str = p[1] if (literal_val =~ /@/) p = literal_val.split('@') literal_val = p[0] lang = Lang.get(p[1]) else lang = Lang.NONE end literal_val = literal_val.slice(/"(.*)"/,1) raise "No data type found " + raw_data + " " + @in_row.id unless(d_type_str) d_type_parse_result = Graph.parsePrettyURI(d_type_str) d_type_prefix = d_type_parse_result.prefix d_type_local_uri_part = d_type_parse_result.localURIPart d_type_graph_part = @in_file.realm.getGraphPart(d_type_prefix) d_type = Graph.getFundDataType(d_type_local_uri_part, d_type_graph_part) if (literal_val == nil) raise "null literal val extracted for " + raw_data end @obj = @in_row.data_writer.newFundValue(literal_val, lang, d_type) end end class CSVDescObjCell < CSVDataCell def initialize(raw_data, in_row, in_col, in_file) super(in_row, in_col, in_file) full_id = raw_data.slice(/^<((\w|\-|_|\.)+)>$/,1) # we discard realm and soc because we can get the node without them # any errors in those parts of identifiers will go undetected # here we are also using a hack to deal with inconsistencies our csv syntax (some nodes have a . in the local URI part) local_uri_part = full_id.split('.')[-1] if (local_uri_part.length < 5) local_uri_part = full_id end @obj = Graph.getDescribableObj(local_uri_part, Graph.repository) end end class CSVSOCCell < CSVDataCell def initialize(raw_data, in_row, in_col, in_file) super(in_row, in_col, in_file) full_id = raw_data.slice(/^\{((\w|\-|_|\.)+)\}$/,1) id_parts = full_id.split('.') soc_realm = KRModel.getRealm(id_parts[0]) @obj = soc_realm.repositoryArea.soc(id_parts[1]) end end class CSVComplValCell < CSVDataCell def initialize(raw_data, in_row, in_col, in_file) super(in_row, in_col, in_file) full_id = raw_data.slice(/\[((\w|\-|\:|_|\.)+)\]/,1) # we discard realm and value-of-data-type because we can get the node without them # any errors in those parts of identifiers will go undetected internal_id = full_id.split('.')[-1] # a hack to take into account inconsistencies in our data if (internal_id.length < 5) internal_id = full_id end @obj = Graph.getComplexValue(internal_id) end end def CSVFile.new_file(path, file_name) if (file_name =~ /--/) CSVComplexValueFile.new(path, file_name) elsif (file_name =~ /SOC/) CSVSOCFile.new(path, file_name) else CSVDescribableObjectFile.new(path, file_name) end end #We asume that the subjects have already been loaded. def write_data @rows.each do |row| # CustomLogger.info("Writing row "+ row.id) row.write_data end end end class CSVGeneralDObjFile < CSVFile attr_reader(:rep_org_grp) end class CSVDescribableObjectFile < CSVGeneralDObjFile def initialize(path, file_name) parts_file_name = file_name.split('.') @realm = KRModel.getRealm(parts_file_name[0]) soc_id = parts_file_name[1] @rep_org_grp = @realm.repositoryArea.soc(soc_id) @rule_set = @rep_org_grp.membersRSBinding super(path, file_name) end def has_d_objs true end # Here we are loading the resources that are going to be inside a soc def setup_subjects #Creating the resources by id @rows.each do |row| row.setup_d_obj_subject end end end class CSVSOCFile < CSVGeneralDObjFile def initialize(path, file_name) parts_file_name = file_name.split('.') @realm = KRModel.getRealm(parts_file_name[1]) @rule_set = @realm.ruleSet(parts_file_name[2]) super(path, file_name) end def has_d_objs false end # Here we are fetching the appropriate SOCs that are the subjects of each row def setup_subjects #Creating the resources by id @rows.each do |row| row.assign_soc_subject end end end class CSVComplexValueFile < CSVFile def initialize(path, file_name) parts_file_name = file_name.split('--') @realm = KRModel.getRealm(parts_file_name[0]) data_type_id = parts_file_name[1].split('.')[0] @data_type = Graph.getDataType(data_type_id, @realm.ont) @rule_set = @data_type.rsForValues super(path, file_name) end def has_d_objs false end def setup_subjects #Creating the resources by id @rows.each do |row| row.make_c_val_subject(@data_type) end end end def DataImport.process_csv path = AppConfig.img_thumb_dir d = Dir.new(path) img_thumb_names = d.entries.reject{|f| not (f =~ /.*\.jpg$/)} img_thumb_names.each do |img_thumb_name| img = Magick::Image::read(path + img_thumb_name).first base_name = img_thumb_name.match(/(.*)\.jpg$/)[1] ImgFileManager.addImgFile(base_name, img.columns, img.rows) end path = AppConfig.csv_dir d = Dir.new(path) csv_file_names = d.entries.reject{|f| not (f =~ /.*\.csv$/)} files = Array.new csv_file_names.each do |csv_file_name| CustomLogger.info("Performing first pass on csv file: #{csv_file_name}") file = CSVFile.new_file(path, csv_file_name) file.setup_subjects files << file end files.each do |file| CustomLogger.info("Performing second pass on csv file: #{file.name}") file.write_data end # TODO: wrap the execution of following general post-graphing processes in # some method in Java code CustomLogger.info("Generating TextBPVs") TextBPVManager.generateCued CustomLogger.info("Generating comp vector values") CompVectorValueManager.computeCued # TODO: add here the caching of DOs that values are associated with in the values themselves? end end end