Fiddling about with corpus size in grammar evaluation.
authorYeGoblynQueenne@splinter <ep50@uni.brighton.ac.uk>
Fri, 26 Aug 2016 11:58:15 +0000 (14:58 +0300)
committerYeGoblynQueenne@splinter <ep50@uni.brighton.ac.uk>
Fri, 26 Aug 2016 11:58:15 +0000 (14:58 +0300)
* Was trying to make the training corpus small enough to allow nested
  k-fold cross validation with entire M:tG corpus, so there's a few
  hacks left in commented out. I think I mostly put everything back the
  way it was before, besides that and the refactoring to recall/4 (see
  next).
* Changed recall/4 to use findall/3 loops rather than aggregate/4,
  because I'm having some issues with nondeterminism with that.
* Renamed stochastic_gnf_parsing_time_limit/1 to
  evaluation_parsing_time_limit/1 (since it's now used for rr grammars
  also, and deterministic ones too, both rr and gnf).

lib/project_utilities.pl
tree_learning/configuration.pl
tree_learning/grammar_evaluation.pl

index 6879e71..e894c83 100644 (file)
@@ -733,7 +733,7 @@ dynamic_configuration:-
                ,(member(O-Vs,Os)
                 % Only show gnf-related options when they're relevant.
                 ,(   (    O == production_arity
-                     ;    O == gnf_parsing_time_limit
+                     ;    O == evaluation_parsing_time_limit
                      )
                  ->  (   configuration:transformation_format(gnf)
                      ;   configuration:transformation_format(stochastic_gnf)
index f69b602..4254ca1 100644 (file)
@@ -18,7 +18,7 @@
                        ,rename_built_ins/1
                        ,sentence_completion_inference_limit_multiplier/1
                        ,start_symbol_arity/1
-                       ,stochastic_gnf_parsing_time_limit/1
+                       ,evaluation_parsing_time_limit/1
                        ,cnf_split_divisor/1
                        ,training_set_size/1
                        ,transformation_format/1
@@ -37,7 +37,7 @@
          ,lexicalisation_strategy/1
          ,output_type/1
          ,production_arity/3
-         ,stochastic_gnf_parsing_time_limit/1
+         ,evaluation_parsing_time_limit/1
          ,training_set_size/1
          ,transformation_format/1
          ].
@@ -101,7 +101,7 @@ edit_output_grammar(false).
 %      file. Unlike edit_output_grammar/1 this too may interfere with
 %      cross-validation.
 %
-edit_sentence_frequencies(true).
+edit_sentence_frequencies(false).
 
 
 %!     evaluation_metric(?Grammar_type,?Metric) is nondet.
@@ -162,6 +162,8 @@ examples_file_name(mtg_hand_simulation).
 
 %examples_file_name(trivial_example).
 
+%examples_file_name(wsj_sgml_inflated).
+%examples_file_name(wsj_sgml_deflated).
 
 %!     examples_predicate_name(?Name) is det.
 %
@@ -239,6 +241,7 @@ language_file_name(ability_text).
 %language_file_name(time_series).
 %language_file_name(metal_genres).
 %language_file_name(english).
+%language_file_name(sgml).
 
 
 %!     lexicalisation_strategy(?Strategy:atom) is det.
@@ -514,7 +517,7 @@ start_symbol_arity(N):-
        ,sumlist([A,B],N).
 
 
-%!     stochastic_gnf_parsing_time_limit(?Limit) is semidet.
+%!     evaluation_parsing_time_limit(?Limit) is semidet.
 %
 %      Maximum number of seconds to search for a parse when parsing
 %      with a stochastic GNF grammar.
@@ -522,7 +525,7 @@ start_symbol_arity(N):-
 %      This is used in stochastic grammar evaluation to avoid having to
 %      spend too long in parsing a single string.
 %
-stochastic_gnf_parsing_time_limit(0.5).
+evaluation_parsing_time_limit(0.5).
 
 
 %!     testing_protocol(?Protocol) is det.
index 7a8f4d3..b7060ea 100644 (file)
@@ -150,12 +150,20 @@ nested_k_fold_cross_validation:-
         ;   throw('nested_k_fold_cross_validation/0:Unknown grammar format.')
         )
        ,TG = print_grammar
-       %,examples_corpus(Cs)
+       ,examples_corpus(Cs)
        % This is only to make the corpus smaller to allow cross-validation
        % to exit before next year. Comment out afterwards.
-       ,training_and_test_sets(_, Cs)
+       /*,training_and_test_sets(_, Cs)
        ,length(Cs, Leng)
        ,format('Cross-validating with ~w examples~n',[Leng])
+       */
+
+       % Also here to make a very small corpus for nested cv
+       /*,examples_corpus(Xs)
+       ,k_samples(Xs,1,0.03,[Cs])
+       ,length(Cs, Leng)
+       ,format('Cross-validating with ~w examples~n',[Leng])*/
+
        ,(   member(F,[right_regular, stochastic_right_regular])
        ->   P = [graph_arity(0)-1]
         ;   member(F, [gnf, stochastic_gnf])
@@ -198,7 +206,7 @@ nested_k_fold_cross_validation:-
                ,Performance_)
        ,average(Performance_, Evaluation)
        ,format('~nAverage performance of tuned grammars: ~6f~n', [Evaluation])
-       ,format('~wPerformance measured using ~w~n', [M]).
+       ,format('Performance measured using ~w~n', [M]).
 
 
 
@@ -580,7 +588,7 @@ stochastic_derivation_evaluation(kld, Ss, D):-
 %      I'll have to take a closer look at pdcg_derivation/5 anyway.
 %
 list_pdcg_derivation(Ts,T,L,C,P):-
-       configuration:stochastic_gnf_parsing_time_limit(Sec)
+       configuration:evaluation_parsing_time_limit(Sec)
        ,start_symbol(S)
        ,stochastic_grammar_module(current,M)
        ,sanitised_user_input(Ts,Ts_)
@@ -625,16 +633,25 @@ desanitise_derivation(D,C):-
 %      configuration.
 %
 deterministic_grammar_evaluation:-
-       training_and_test_sets(Ts,Ss)
+       configuration:evaluation_metric(deterministic, M)
+       ,training_and_test_sets(Ts,Ss)
        ,training_and_test_sets_counts(Tc,Sc)
+       % Was trying to make testing corpus really, really small.
+       %,examples_corpus(Cs)
+       %,k_samples(Cs,1,0.03,[S1])
+       %,splits(S1,0.9,Ts,Ss)
+       %,length(Ts, Tc)
+       %,length(Ss,Sc)
        ,format('Training on ~w examples.~n',[Tc])
        ,print_grammar(Ts)
        ,make
        ,format('Testing on ~w examples.~n',[Sc])
-       ,deterministic_grammar_evaluation(f_score, Ss, F)
+       ,deterministic_grammar_evaluation(M, Ss, F)
+       ,capitalise_atom(M, M_)
        ,(   F =.. [f,F_,P,R]
        ->   format('Precision: ~6f~nRecall:    ~6f~nF-score:   ~6f~n',[P,R,F_])
-        ;   format('F-score: ~6f~n',[F])
+             % ^^ Extra spaces justifyish output.
+        ;   format('~w: ~6f~n',[M_,F])
         ).
 
 
@@ -691,7 +708,8 @@ deterministic_grammar_evaluation(precision_recall, Ss, P-R):-
 %      number of parses to attempt before reporting performance.
 %
 precision(St,Ss,N,P):-
-       max_sentence_length(Ss, M)
+       format('precision/4: Evaluating on ~w sentences.~n', [N])
+       ,max_sentence_length(Ss, M)
        ,M_ is ceiling(M)
        %,format('precision/4: Max sentence length ~w~n',[M_])
        % Sort corpus without removing duplicates.
@@ -728,16 +746,40 @@ precision(St,Ss,N,P):-
 %      number of Examples to attempt to parse before reporting results.
 %
 recall(St,Ss,N,P):-
-       aggregate(count
+       % Has some small issues with nondeterminism (strange, I know)
+       /*aggregate(count
                 ,(S,I,M)
                  % Attempt to parse N sentences chosen at random
                 ,(between(1,N,I)
                  ,random_member(S,Ss)
+                 %,format('recall/4: Selected string ~w for parsing.~n', [S])
                  ,length(S,M)
                  % Only attempt derivations at depth M
-                 ,once(dcg_derivation(St,just(M),S))
+                 ,once(dcg_derivation(St,just(M),S)
+                  )
+                 %,format('recall/4: Accepted ~w.~n', [S])
                  )
-                ,Ds)
+                ,Ds)*/
+       configuration:evaluation_parsing_time_limit(Sec)
+       ,format('recall/4: Evaluating on ~w sentences.~n', [N])
+       ,findall(S-M
+              ,(between(1,N,_I)
+               ,random_member(S,Ss)
+               ,length(S, M)
+               )
+              ,Rs)
+       ,findall(S
+               ,(member(S-M, Rs)
+                ,G = dcg_derivation(St,just(M),S)
+                ,catch(call_with_time_limit(Sec,G)
+                      ,time_limit_exceeded
+                      ,(%writeln(S-M)
+                       !
+                       ,fail)
+                      )
+                )
+               ,Ps)
+       ,length(Ps, Ds)
        ,format('recall/4: ~w out of ~w strings in corpus generated.~n',[Ds,N])
        ,P is Ds / N.