Skip to content

Commit

Permalink
Script code updates
Browse files Browse the repository at this point in the history
Specifically, the main change here is in the Latest script,
to allow easier debugging/introspection of issues.

The query is set into a variable (to allow dumping)
and the $data variable was renamed as the name was identified
as a confusing one.
  • Loading branch information
mickeyn committed Jun 22, 2018
1 parent 4c8439e commit 90bc452
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 47 deletions.
80 changes: 44 additions & 36 deletions lib/MetaCPAN/Script/Latest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -87,25 +87,30 @@ sub run {
? { terms => { "module.name" => \@filter } }
: { exists => { field => "module.name" } };

my $scroll = $self->index->type('file')->filter(
{
bool => {
must => [
{
nested => {
path => 'module',
filter => { bool => { must => \@module_filters } }
}
},
{ term => { 'maturity' => 'released' } },
],
must_not => [
{ term => { status => 'backpan' } },
{ term => { distribution => 'perl' } }
]
}
# This query will be used to produce a (scrolled) list of
# 'file' type records where the module.name matches the
# distribution name and which are released &
# indexed (the 'leading' module)
my $query = {
bool => {
must => [
{
nested => {
path => 'module',
filter => { bool => { must => \@module_filters } }
}
},
{ term => { 'maturity' => 'released' } },
],
must_not => [
{ term => { status => 'backpan' } },
{ term => { distribution => 'perl' } }
]
}
)
};

my $scroll
= $self->index->type('file')->filter($query)
->source(
[qw< author date distribution module.name release status >] )
->size(100)->raw->scroll;
Expand All @@ -122,13 +127,13 @@ sub run {
while ( my $file = $scroll->next ) {
$i++;
log_debug { "$i of " . $scroll->total } unless ( $i % 1000 );
my $data = $file->{_source};
my $file_data = $file->{_source};

# Convert module name into Parse::CPAN::Packages::Fast::Package object.
my @modules = grep {defined}
map {
eval { $p->package( $_->{name} ) }
} @{ $data->{module} };
} @{ $file_data->{module} };

push @modules_to_purge, @modules;

Expand All @@ -152,21 +157,24 @@ sub run {
# (like /\.pm\.gz$/) so distvname might not be present.
# I assume cpanid always will be.
if ( defined( $dist->distvname )
&& $dist->distvname eq $data->{release}
&& $dist->cpanid eq $data->{author} )
&& $dist->distvname eq $file_data->{release}
&& $dist->cpanid eq $file_data->{author} )
{
my $upgrade = $upgrade{ $data->{distribution} };
my $upgrade = $upgrade{ $file_data->{distribution} };

# If multiple versions of a dist appear in 02packages
# only mark the most recent upload as latest.
next
if ( $upgrade
&& $self->compare_dates( $upgrade->{date}, $data->{date} )
if (
$upgrade
&& $self->compare_dates(
$upgrade->{date}, $file_data->{date}
)
);
$upgrade{ $data->{distribution} } = $data;
$upgrade{ $file_data->{distribution} } = $file_data;
}
elsif ( $data->{status} eq 'latest' ) {
$downgrade{ $data->{release} } = $data;
elsif ( $file_data->{status} eq 'latest' ) {
$downgrade{ $file_data->{release} } = $file_data;
}
}
}
Expand All @@ -176,28 +184,28 @@ sub run {
type => 'file'
);

while ( my ( $dist, $data ) = each %upgrade ) {
while ( my ( $dist, $file_data ) = each %upgrade ) {

# Don't reindex if already marked as latest.
# This just means that it hasn't changed (query includes 'latest').
next if ( !$self->force and $data->{status} eq 'latest' );
next if ( !$self->force and $file_data->{status} eq 'latest' );

$self->reindex( $bulk, $data, 'latest' );
$self->reindex( $bulk, $file_data, 'latest' );
}

while ( my ( $release, $data ) = each %downgrade ) {
while ( my ( $release, $file_data ) = each %downgrade ) {

# Don't downgrade if this release version is also marked as latest.
# This could happen if a module is moved to a new dist
# but the old dist remains (with other packages).
# This could also include bug fixes in our indexer, PAUSE, etc.
next
if ( !$self->force
&& $upgrade{ $data->{distribution} }
&& $upgrade{ $data->{distribution} }->{release} eq
$data->{release} );
&& $upgrade{ $file_data->{distribution} }
&& $upgrade{ $file_data->{distribution} }->{release} eq
$file_data->{release} );

$self->reindex( $bulk, $data, 'cpan' );
$self->reindex( $bulk, $file_data, 'cpan' );
}
$bulk->flush;
$self->index->refresh;
Expand Down
20 changes: 10 additions & 10 deletions lib/MetaCPAN/Script/Mapping.pm
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ sub copy_type {
sub _copy_slice {
my ( $self, $query, $index, $type ) = @_;

my $scroll = $self->es()->scroll_helper(
my $scroll = $self->es->scroll_helper(
search_type => 'scan',
size => 250,
scroll => '10m',
Expand Down Expand Up @@ -341,7 +341,7 @@ sub empty_type {
max_count => 500,
);

my $scroll = $self->es()->scroll_helper(
my $scroll = $self->es->scroll_helper(
search_type => 'scan',
size => 250,
scroll => '10m',
Expand Down Expand Up @@ -381,37 +381,37 @@ sub deploy_mapping {
author =>
decode_json(MetaCPAN::Script::Mapping::CPAN::Author::mapping),
distribution =>
decode_json(MetaCPAN::Script::Mapping::CPAN::Distribution::mapping
decode_json( MetaCPAN::Script::Mapping::CPAN::Distribution::mapping
),
favorite =>
decode_json(MetaCPAN::Script::Mapping::CPAN::Favorite::mapping
decode_json( MetaCPAN::Script::Mapping::CPAN::Favorite::mapping
),
file =>
decode_json(MetaCPAN::Script::Mapping::CPAN::File::mapping),
mirror =>
decode_json(MetaCPAN::Script::Mapping::CPAN::Mirror::mapping),
permission =>
decode_json(MetaCPAN::Script::Mapping::CPAN::Permission::mapping
decode_json( MetaCPAN::Script::Mapping::CPAN::Permission::mapping
),
package =>
decode_json(MetaCPAN::Script::Mapping::CPAN::Package::mapping
decode_json( MetaCPAN::Script::Mapping::CPAN::Package::mapping
),
rating =>
decode_json(MetaCPAN::Script::Mapping::CPAN::Rating::mapping),
release =>
decode_json(MetaCPAN::Script::Mapping::CPAN::Release::mapping
decode_json( MetaCPAN::Script::Mapping::CPAN::Release::mapping
),
},

user => {
account =>
decode_json(MetaCPAN::Script::Mapping::User::Account::mapping
decode_json( MetaCPAN::Script::Mapping::User::Account::mapping
),
identity =>
decode_json(MetaCPAN::Script::Mapping::User::Identity::mapping
decode_json( MetaCPAN::Script::Mapping::User::Identity::mapping
),
session =>
decode_json(MetaCPAN::Script::Mapping::User::Session::mapping
decode_json( MetaCPAN::Script::Mapping::User::Session::mapping
),
},
contributor => {
Expand Down
2 changes: 1 addition & 1 deletion lib/MetaCPAN/Script/Session.pm
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ with 'MetaCPAN::Role::Script', 'MooseX::Getopt';
sub run {
my $self = shift;

my $scroll = $self->es()->scroll_helper(
my $scroll = $self->es->scroll_helper(
size => 10_000,
scroll => '1m',
index => 'user',
Expand Down

0 comments on commit 90bc452

Please sign in to comment.