2023-10-13 00:04:20 +00:00
|
|
|
namespace :dataset do
|
2023-10-13 00:02:10 +00:00
|
|
|
desc "TODO"
|
|
|
|
task ingest: :environment do
|
2023-10-13 02:24:31 +00:00
|
|
|
ingest_pos
|
|
|
|
ingest_dictionary
|
|
|
|
puts "Ingest complete."
|
2023-10-13 00:02:10 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
end
|
|
|
|
|
2023-10-13 02:24:31 +00:00
|
|
|
def ingest_pos
|
2023-10-13 00:02:10 +00:00
|
|
|
if PartOfSpeech.count > 0
|
2023-10-13 02:24:31 +00:00
|
|
|
puts "Parts of speech data already exists in table! Aborting."
|
2023-10-13 00:02:10 +00:00
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
parts_of_speech = JSON.parse(File.read('db/dataset_en/toki-partsofspeech.json'))
|
|
|
|
|
|
|
|
parts_of_speech.each do |pos|
|
|
|
|
PartOfSpeech.create(pos: pos['pos'], definition: pos['definition'])
|
|
|
|
end
|
2023-10-13 02:24:31 +00:00
|
|
|
puts "Parts of speech ingest complete."
|
|
|
|
end
|
|
|
|
|
|
|
|
def ingest_dictionary
|
|
|
|
if Word.count > 0
|
|
|
|
puts "Dictionary data already exists in table! Aborting."
|
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
dictionary = JSON.parse(File.read('db/dataset_en/toki-dictionary.json'))
|
|
|
|
|
|
|
|
dictionary.each do |entry|
|
|
|
|
if entry['word'] == "a"
|
|
|
|
puts entry
|
|
|
|
end
|
|
|
|
word = Word.create(word: entry['word'])
|
|
|
|
|
|
|
|
entry['definitions'].each do |definition|
|
|
|
|
word.definitions.create(pos: definition['pos'], definition: definition['definition'])
|
|
|
|
end
|
|
|
|
end
|
|
|
|
puts "Dictionary ingest complete."
|
2023-10-13 00:02:10 +00:00
|
|
|
end
|