2023-10-13 00:04:20 +00:00
|
|
|
namespace :dataset do
|
2023-10-13 00:02:10 +00:00
|
|
|
desc "TODO"
|
|
|
|
task ingest: :environment do
|
2023-12-07 16:31:54 +00:00
|
|
|
english = create_language('English')
|
|
|
|
ingest_pos(english, 'dataset_en')
|
|
|
|
ingest_dictionary(english, 'dataset_en')
|
|
|
|
french = create_language('French')
|
|
|
|
ingest_pos(french, 'dataset_fr')
|
|
|
|
ingest_dictionary(french, 'dataset_fr')
|
2023-10-13 02:24:31 +00:00
|
|
|
puts "Ingest complete."
|
2023-10-13 00:02:10 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
2023-12-07 16:31:54 +00:00
|
|
|
def create_language(name)
|
|
|
|
if Language.where(name: name).count > 0
|
|
|
|
puts "Language #{name} already exists! Skipping step."
|
2023-10-25 03:47:01 +00:00
|
|
|
return
|
|
|
|
end
|
|
|
|
|
2023-12-07 16:31:54 +00:00
|
|
|
return Language.create(name: name)
|
2023-10-25 03:47:01 +00:00
|
|
|
end
|
|
|
|
|
2023-12-07 16:31:54 +00:00
|
|
|
def ingest_pos(language, dataset)
|
|
|
|
if PartOfSpeech.where(language_id: language.id).count > 0
|
2023-10-13 02:24:31 +00:00
|
|
|
puts "Parts of speech data already exists in table! Aborting."
|
2023-10-13 00:02:10 +00:00
|
|
|
return
|
|
|
|
end
|
|
|
|
|
2023-12-07 16:31:54 +00:00
|
|
|
parts_of_speech = JSON.parse(File.read("db/#{dataset}/toki-partsofspeech.json"))
|
2023-10-13 00:02:10 +00:00
|
|
|
|
|
|
|
parts_of_speech.each do |pos|
|
2023-10-25 03:47:01 +00:00
|
|
|
PartOfSpeech.create(pos: pos['pos'], definition: pos['definition'], language_id: language.id)
|
2023-10-13 00:02:10 +00:00
|
|
|
end
|
2023-10-13 02:24:31 +00:00
|
|
|
puts "Parts of speech ingest complete."
|
|
|
|
end
|
|
|
|
|
2023-12-07 16:31:54 +00:00
|
|
|
def ingest_dictionary(language, dataset)
|
|
|
|
if Definition.where(language_id: language.id).count > 0
|
|
|
|
puts "Dictionary data for this language already exists in table! Aborting."
|
2023-10-13 02:24:31 +00:00
|
|
|
return
|
|
|
|
end
|
|
|
|
|
2023-12-07 16:31:54 +00:00
|
|
|
dictionary = JSON.parse(File.read("db/#{dataset}/toki-dictionary.json"))
|
2023-10-13 02:24:31 +00:00
|
|
|
|
|
|
|
dictionary.each do |entry|
|
|
|
|
if entry['word'] == "a"
|
|
|
|
puts entry
|
|
|
|
end
|
2023-12-07 16:31:54 +00:00
|
|
|
#word = Word.create(word: entry['word'])
|
|
|
|
word = Word.where(word: entry['word']).first
|
|
|
|
|
|
|
|
if word.nil?
|
|
|
|
word = Word.create(word: entry['word'])
|
|
|
|
end
|
2023-10-13 02:24:31 +00:00
|
|
|
|
|
|
|
entry['definitions'].each do |definition|
|
2023-10-25 03:47:01 +00:00
|
|
|
word.definitions.create(pos: definition['pos'], definition: definition['definition'], language_id: language.id)
|
2023-10-13 02:24:31 +00:00
|
|
|
end
|
|
|
|
end
|
|
|
|
puts "Dictionary ingest complete."
|
2023-10-13 00:02:10 +00:00
|
|
|
end
|